library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(spotifyr) # spotify api
library(tidytext)
##
## Attaching package: 'tidytext'
##
## The following object is masked from 'package:spotifyr':
##
## tidy
library(rgenius) # genius api
library(wordcloud)
## Loading required package: RColorBrewer
library(wordcloud2)
# Load ggridges package
library(ggridges)
devtools::install_github('charlie86/spotifyr') # spotify
## Skipping install of 'spotifyr' from a github remote, the SHA1 (45ffa4ee) has not changed since last install.
## Use `force = TRUE` to force installation
# Authenticate Spotify
access_token <- get_spotify_access_token(
client_id="694b3d6cc44c4bc9b1ddd0debb625714",
client_secret="00012038b8bb439eb1d31a673f02a78f")
# Set up authentication for Genius API
Sys.setenv(GENIUS_API_TOKEN = 'svE9xETWjRF_YczqaEIna2dBChd6_6EegcxhabQkk32O-8OzFoWRkwsL4YtCd11p')
Harry_styles <- get_artist_audio_features('Harry Styles')
Harry <- get_artist_albums('6KImCVD70vtIoJWnq6nGn3')
# creating an empty list to store the data frames
album_list <- list()
# iterating over the id values in harry data frame
for (id in Harry$id) {
# calling get_albums() function for each id and store the result in a data frame
album_data <- get_albums(id)
# adding the resulting data frame to the list
album_list[[id]] <- album_data
}
# combining all data frames into a single data frame using the bind_rows() function
albums_df <- bind_rows(album_list)
colnames(Harry_styles)
## [1] "artist_name" "artist_id"
## [3] "album_id" "album_type"
## [5] "album_images" "album_release_date"
## [7] "album_release_year" "album_release_date_precision"
## [9] "danceability" "energy"
## [11] "key" "loudness"
## [13] "mode" "speechiness"
## [15] "acousticness" "instrumentalness"
## [17] "liveness" "valence"
## [19] "tempo" "track_id"
## [21] "analysis_url" "time_signature"
## [23] "artists" "available_markets"
## [25] "disc_number" "duration_ms"
## [27] "explicit" "track_href"
## [29] "is_local" "track_name"
## [31] "track_preview_url" "track_number"
## [33] "type" "track_uri"
## [35] "external_urls.spotify" "album_name"
## [37] "key_name" "mode_name"
## [39] "key_mode"
Harry_styles <- Harry_styles %>% rename("id" = "album_id")
colnames(Harry_styles)
## [1] "artist_name" "artist_id"
## [3] "id" "album_type"
## [5] "album_images" "album_release_date"
## [7] "album_release_year" "album_release_date_precision"
## [9] "danceability" "energy"
## [11] "key" "loudness"
## [13] "mode" "speechiness"
## [15] "acousticness" "instrumentalness"
## [17] "liveness" "valence"
## [19] "tempo" "track_id"
## [21] "analysis_url" "time_signature"
## [23] "artists" "available_markets"
## [25] "disc_number" "duration_ms"
## [27] "explicit" "track_href"
## [29] "is_local" "track_name"
## [31] "track_preview_url" "track_number"
## [33] "type" "track_uri"
## [35] "external_urls.spotify" "album_name"
## [37] "key_name" "mode_name"
## [39] "key_mode"
colnames(Harry)
## [1] "album_group" "album_type" "artists"
## [4] "available_markets" "href" "id"
## [7] "images" "name" "release_date"
## [10] "release_date_precision" "total_tracks" "type"
## [13] "uri" "external_urls.spotify"
colnames(albums_df)
## [1] "album_group" "album_type" "artists"
## [4] "available_markets" "copyrights" "genres"
## [7] "href" "id" "images"
## [10] "label" "name" "popularity"
## [13] "release_date" "release_date_precision" "total_tracks"
## [16] "type" "uri" "external_ids.upc"
## [19] "external_urls.spotify" "tracks.href" "tracks.items"
## [22] "tracks.limit" "tracks.next" "tracks.offset"
## [25] "tracks.previous" "tracks.total"
# Merging all 3 dataframes
merged_df <- full_join(Harry, albums_df, by = "id")
# joining the resulting data frame with the tracks_df data frame by the id column
merged_df <- full_join(merged_df, Harry_styles, by = "id")
# checking the resulting data frame
merged_df
df <- select(merged_df, -name.x, -release_date.x, -total_tracks.x, -uri.x, -type.x, -artists.y, -album_group.x, -album_type.x, -artists.x, -images.x, -external_ids.upc, -available_markets.x, -href.x, -external_urls.spotify.x, -release_date_precision.x, -album_group.y, -album_type.y, -available_markets.y, -copyrights, -href.y, -images.y, -name.y, -release_date.y, -release_date_precision.y, -total_tracks.y, -type.y, -uri.y, -external_urls.spotify.y, -tracks.href,-tracks.limit, -tracks.next, -tracks.offset, -tracks.previous, -analysis_url, -available_markets, -explicit, -disc_number, -track_href, -is_local, -track_preview_url, -type, -track_uri, -external_urls.spotify, -external_urls.spotify)
head(df,5)
colnames(df)
## [1] "id" "genres"
## [3] "label" "popularity"
## [5] "tracks.items" "tracks.total"
## [7] "artist_name" "artist_id"
## [9] "album_type" "album_images"
## [11] "album_release_date" "album_release_year"
## [13] "album_release_date_precision" "danceability"
## [15] "energy" "key"
## [17] "loudness" "mode"
## [19] "speechiness" "acousticness"
## [21] "instrumentalness" "liveness"
## [23] "valence" "tempo"
## [25] "track_id" "time_signature"
## [27] "artists" "duration_ms"
## [29] "track_name" "track_number"
## [31] "album_name" "key_name"
## [33] "mode_name" "key_mode"
# group by the id column
ged_df <- df %>%
group_by(id) %>%
# sort each group by the popularity column in descending order
arrange(desc(popularity))
# checking the resulting data frame
head(ged_df, 5)
ggplot(df, aes(x = album_name, y = tracks.total, fill = artist_name)) +
geom_bar(stat = "identity", color = "black", alpha = 0.8) +
scale_fill_brewer(palette = "Paired") +
labs(x = "Album Name", y = "Total Tracks", title = "Total Tracks by Album",
subtitle = "Grouped by Artist Name", fill = "Artist Name") +
theme_minimal() +
theme(plot.title = element_text(size = 18, face = "bold"),
plot.subtitle = element_text(size = 14),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10))
## As we see a lot NA values for Harry Styles we have limited album data
so, decided to get rid of NA rows.
# drop rows where any of the column value is NA
df <- na.omit(df)
head(df,5)
ggplot(df, aes(x = album_name, y = tracks.total, fill = artist_name)) +
geom_bar(stat = "identity", color = "black", alpha = 0.8) +
scale_fill_brewer(palette = "Paired") +
labs(x = "Album Name", y = "Total Tracks", title = "Total Tracks by Album",
subtitle = "Grouped by Artist Name", fill = "Artist Name") +
theme_minimal() +
theme(plot.title = element_text(size = 18, face = "bold"),
plot.subtitle = element_text(size = 14),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10))
unique(df$id)
## [1] "5r36AJ6VOJtp00oxSkBZ5h" "7xV2TzoaVc0ycW7fwBwAml" "1FZKIm3JVDCxTchXDo5jOV"
ggplot(df, aes(x = danceability, fill = album_name)) +
geom_density(alpha = 0.4) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73")) +
labs(title = "Distribution of Danceability by Album",
x = "Value", y = "Density") +
theme_minimal()
ggplot(df, aes(x = valence, fill = album_name)) +
geom_density(alpha = 0.4) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73")) +
labs(title = "Distribution of Valence by Album",
x = "Value", y = "Density") +
theme_minimal()
ggplot(df, aes(x = loudness, fill = album_name)) +
geom_density(alpha = 0.4) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73")) +
labs(title = "Distribution of Loudness by Album",
x = "Value", y = "Density") +
theme_minimal()
ggplot(df, aes(x = tempo, fill = album_name)) +
geom_density(alpha = 0.4) +
scale_fill_manual(values = c("#E69F00", "#56B4E9", "#009E73")) +
labs(title = "Distribution of Tempo by Album",
x = "Value", y = "Density") +
theme_minimal()
ggplot(df, aes(x = album_name, fill = key_mode)) +
geom_bar(position = "stack", alpha = 0.8) +
labs(title = "Key Note Distribution by Album", x = "Album", y = "Count", fill = "Key Note") +
theme_minimal()
gaga <- get_artist_audio_features('Lady Gaga')
gaga
# Define the genres for which you want to get the top 5 artists
genres <- c("rock", "pop", "hip-hop", "electronic", "country")
# Initialize an empty data frame to store the results
df <- data.frame()
# Loop over the genres and get the top 5 artists for each genre
for (genre in genres) {
artists <- get_genre_artists(genre = genre, limit = 5)
df <- rbind(df, data.frame(genre = genre, artist = artists$name))
}
# Filter df to create a data frame of the top 5 artists for each genre
rock_data <- df[df$genre == "rock", ]
pop_data <- df[df$genre == "pop", ]
hiphop_data <- df[df$genre == "hip-hop", ]
electronic_data <- df[df$genre == "electronic", ]
country_data <- df[df$genre == "country", ]
rock_data
# Create a bar chart of the top artists for each genre
ggplot(df, aes(x = artist, fill = genre)) +
geom_bar() +
coord_flip() +
labs(x = "Artist", y = "Number of Plays", title = "Top 5 Artists by Genre")
library(wordcloud)
# Loop over the genres and get the top 5 artists for each genre
for (genre in genres) {
artists <- get_genre_artists(genre = genre, limit = 10)
# Loop over the artists and get the top tracks
for (artist_id in artists$id) {
top_tracks <- get_artist_top_tracks(artist_id)
track_names <- top_tracks$name
# Generate a word cloud for the artist's top tracks
wordcloud(track_names, scale=c(3,0.5), min.freq=1, max.words=15, random.order=FALSE, colors=brewer.pal(8, "Dark2"))
title(paste0("Word cloud for ", genre, " artist ", get_artist(artist_id)$name))
}
}
## Loading required namespace: tm
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
billie_eilish <- get_artist_audio_features(
artist = "Billie Eilish",
include_groups = "album",
return_closest_artist = TRUE,
dedupe_albums = TRUE,
market = NULL,
authorization = access_token
)
bi_id <- "6qqNVTkY8uBg9cP3Jd7DAH"
bi<- get_artist_albums(
id = bi_id,
include_groups = c("album", "single", "appears_on", "compilation"),
market = NULL,
limit = 20,
offset = 0,
authorization = access_token,
include_meta_info = FALSE
)
get_albums function, and combines all data frames into a
single data frame using the bind_rows function.# create an empty list to store the data frames
album_list1 <- list()
# iterate over the id values in harry data frame
for (id in bi$id) {
# call get_albums() function for each id and store the result in a data frame
album_data<- get_albums(id, authorization = access_token)
# add the resulting data frame to the list
album_list1[[id]] <- album_data
}
# combine all data frames into a single data frame using the bind_rows() function
bi_albums_df <- bind_rows(album_list1)
# remaining the column
billie_eilish <- billie_eilish%>% rename("id" = "album_id")
# Merging all dataframes
bi_merged_df <- full_join(billie_eilish, bi_albums_df,by ="id")
# check the resulting data frame
head(bi_merged_df,5)
unique(bi_merged_df$id)
## [1] "0JGOiO34nwfUdDrD612dOp" "5tzRuO6GP7WRvP3rEOPAO9" "0S0KGZnfBGSIssfF54WSJh"
## [4] "1YPWxMpQEC8kcOuefgXbhj" "2kzPJWrTjVKEYWWhowXLnz" "5lDUpb6zBr4xBh9AGUXYtQ"
## [7] "2AyexmwyUy1nZfBadyewL7" "3ZuV4xSFJnWDncgMICfFmX" "4E8puNI8tw7cXz6YJkwMew"
## [10] "5G58VVE9ub1KE01Mvbd8XM" "3oxhQpF3Twbkl18oQYfnh5" "5sXSHscDjBez8VF20cSyad"
## [13] "4i3rAwPw7Ln2YrKDusaWyT" "6lMlX68jJrx67hiCqdiDvW" "0LgnntyagLdfW5Dz2OSYHU"
## [16] "0ifM8RTX9HjtCJtY9452bW" "2sBB17RXTamvj7Ncps15AK" "1Z0XtKcevvITZ5ydimkYcx"
## [19] "5m9lO9SriYMPpXTrVIU8P5" "5XRJoC2QtsNbAubsCrBBbG"
bi_df <- select(bi_merged_df, -album_type.x, -album_release_date_precision, -analysis_url, -available_markets.x, -explicit, -track_href, -is_local, -external_urls.spotify.y, -tracks.href, -tracks.limit, -tracks.next, -tracks.offset, -tracks.previous, -artists.x, -disc_number, -type.x, -track_uri, -external_urls.spotify.x, -album_type.y, -available_markets.y, -copyrights, -href, -external_ids.upc, -uri, -name, -label, -images, -type.y, -tracks.total, -track_preview_url, -release_date_precision)
# group by the id column
bi_ged_df <- bi_df %>%
group_by(id) %>%
# sort each group by the popularity column in descending order
arrange(desc(popularity))
# check the resulting data frame
bi_ged_df
b_alb <- head(unique(bi_ged_df$album_name),1)
b_alb
## [1] "WHEN WE ALL FALL ASLEEP, WHERE DO WE GO?"
b_alb_df <- subset(bi_ged_df, album_name %in% b_alb)
b_alb_df
taylor_swift <- get_artist_audio_features(
artist = "Taylor Swift",
include_groups = "album",
return_closest_artist = TRUE,
dedupe_albums = TRUE,
market = NULL,
authorization = access_token
)
ts_id <- "06HL4z0CvFAxyc27GXpf02"
ts<- get_artist_albums(
id = ts_id,
include_groups = c("album", "single", "appears_on", "compilation"),
market = NULL,
limit = 20,
offset = 0,
authorization = access_token,
include_meta_info = FALSE
)
# create an empty list to store the data frames
album_list3 <- list()
# iterate over the id values in harry data frame
for (id in ts$id) {
# call get_albums() function for each id and store the result in a data frame
album_data<- get_albums(id, authorization = access_token)
# add the resulting data frame to the list
album_list3[[id]] <- album_data
}
# combine all data frames into a single data frame using the bind_rows() function
ts_albums_df <- bind_rows(album_list3)
# remaining the column
taylor_swift <- taylor_swift %>% rename("id" = "album_id")
# Merging all dataframes
ts_merged_df <- full_join(taylor_swift, ts_albums_df,by ="id")
# check the resulting data frame
head(ts_merged_df,5)
unique(ts_merged_df$id)
## [1] "3lS1y25WAhcqJDATJK70Mq" "4894htPwC6zoiuTqUQwn4I" "151w1FgRZfnKZA9FEcg9Z3"
## [4] "4moVP48t9bji7djUc5VOvi" "6kZ42qRrzov54LcAk4onW9" "6x9s2ObPdpATZgrwxsk9c0"
## [7] "4hDok0OAJd57SGIT8xuWJH" "6AORtDjduMM3bupSWzbTSG" "1DT6fDJL6AWPJxe7Lq1dPb"
## [10] "2Xoteh7uEpea4TohMxjtaq" "40zKHK0aGM4PITqPy5vfQh" "5jmVg7rwRcgd6ARPAeYNSm"
## [13] "0PZ7lAru5FDFHuirTkWe9Z" "3VaaZ7OIbGLi60NVsnueoo" "1pzvBxYgT6OVwJLtHkrdQK"
## [16] "7v7pe5vZQPWB5zW0JrKRiw" "2fenSS68JI1h4Fo296JfGr" "0xS0iOtxQRoJvfcFcJA5Gv"
## [19] "1NAmidJlEaVgA3MpcPFYGq" "1MHuZZrGT36cXLxAQ5cLP3" "6DEjYFkNZh67HP7R9PSZvv"
## [22] "1Hrs3jLGexOvBoaPMoOQYJ" "1MPAXuTVL2Ej5x0JHiSPq8" "0bEySlRAkuPxV9KVWhXXBr"
## [25] "2QJmrSgbdM35R67eoGQo4j" "4uUAUqIfSomFTbbjGp3TYp" "6EsTJnpahwW6xX20zvqQgZ"
## [28] "0L3oaZUj3loxosjvugCLGG" "7N3xz2HFNVH5BEkm8ZVmoR" "02H4kc9YLgorpUIREOwa0q"
## [31] "34OkZVpuzBa9y40DCy0LPR" "5LVuISYu2g4YbyxYhFPIbK" "4R3hKmiJWEjRe6l03DoV9t"
## [34] "5fy0X0JmZRZnVa2UEicIOl" "1yGbNOtRIgdIiGHOEBaZWf" "1EoDsNmgTLtmwe1BDAVxV5"
## [37] "1KlU96Hw9nlvqpBPlSqcTV" "1KVKqWeRuXsJDLTW0VuD29" "4jTYApZPMapg56gRycOn0D"
## [40] "63lVCnv8B30qedCiTlAc9J" "7daMnnffzVSbNJj8Dy75Ev" "11gfxXxJPd3j6sdWUyEA5S"
## [43] "4ErTrymYK8VIBQR8J8Hjy1" "6fyR4wBPwLHKcRtxgd4sGh" "5MfAxS5zz8MlfROjGQVXhy"
## [46] "6Ar2o9KCqcyYF9J0aQP3au" "75N0Z60SNMQbAPYZuxKgWd" "3QXlUpSDgakWZK2WqQv0pF"
## [49] "1BdjHo5IR6twMhJDxzlpLt" "5EpMjweRD573ASl7uNiHym" "6GPyXXND6hIZpd9bRhCsFv"
## [52] "6S6JQWzUrJVcJLK4fi74Fw" "3Mvk2LKxfhc2KVSnDYC40I" "6tgMb6LEwb3yj7BdYy462y"
## [55] "08CWGiv27MVQhYpuTtvx83" "2dqn5yOQWdyGwOpOIi9O4x" "3EzFY9Rg0PpbADMth746zi"
## [58] "1CYlmaXajTC59VJWSSeE7Y" "6vRfYCQ1mKKfnB6D7R4N5p" "2gP2LMVcIFgVczSJqn340t"
## [61] "43OpbkiiIxJO8ktIB777Nn" "1ycoesYxIFymXWebfmz828" "1rwH2628RIOVM3WMwwO418"
## [64] "5eyZZoQEFQWRHkV2xgAeBw" "7mzrIsaAjnXihW3InKjlC3" "2rU7u7C2v5i45MFVxx7xG1"
## [67] "1mFGeuBwVfAyli6aDoy9OI" "1ymIvQpnPQBj1lGlJRqrFQ"
# dropping unnecessary columns
ts_df <- select(ts_merged_df, -album_type.x, -album_release_date_precision, -analysis_url, -available_markets.x, -explicit, -track_href, -is_local, -external_urls.spotify.y, -tracks.href, -tracks.limit, -tracks.next, -tracks.offset, -tracks.previous, -artists.x, -disc_number, -type.x, -track_uri, -external_urls.spotify.x, -album_type.y, -available_markets.y, -copyrights, -href, -external_ids.upc, -uri, -name, -label, -images, -type.y, -tracks.total, -track_preview_url, -release_date_precision)
# group by the id column
ts_ged_df <- ts_df %>%
group_by(id) %>%
# sort each group by the popularity column in descending order
arrange(desc(popularity))
# check the resulting data frame
ts_ged_df
ts_alb <- head(unique(ts_ged_df$album_name),1)
ts_alb
## [1] "Midnights"
Linkin_Park <- get_artist_audio_features(
artist = "Linkin Park",
include_groups = "album",
return_closest_artist = TRUE,
dedupe_albums = TRUE,
market = NULL,
authorization = access_token
)
LP_id <- "6XyY86QOPPrYVGvF9ch6wz"
LP_albums <- get_artist_albums(
id = LP_id,
include_groups = c("album", "single", "appears_on", "compilation"),
market = NULL,
limit = 20,
offset = 0,
authorization = access_token,
include_meta_info = FALSE
)
# Create an empty list to store the data frames
album_list2 <- list()
# Iterate over the id values in the artist_albums data frame
for (id in LP_albums$id) {
album_data1 <- get_albums(id, authorization = access_token)
# Add the resulting data frame to the list
album_list2[[id]] <- album_data1
}
# Combine all data frames into a single data frame using the bind_rows() function
LP_albums_df <- bind_rows(album_list2)
Linkin_Park <- Linkin_Park %>% rename("id" = "album_id")
# Merging all dataframes
LP_merged_df <- full_join(LP_albums_df, LP_albums, by = "id")
LP_merged_df <- full_join(LP_merged_df, Linkin_Park, by = "id")
LP_df <- select(LP_merged_df, -name.x, -release_date.x, -total_tracks.x, -uri.x, -type.x, -artists.y, -album_group.x, -album_type.x, -artists.x, -images.x, -external_ids.upc, -available_markets.x, -href.x, -external_urls.spotify.x, -release_date_precision.x, -album_group.y, -album_type.y, -available_markets.y, -copyrights, -href.y, -images.y, -name.y, -release_date.y, -release_date_precision.y, -total_tracks.y, -type.y, -uri.y, -external_urls.spotify.y, -tracks.href,-tracks.limit, -tracks.next, -tracks.offset, -tracks.previous, -analysis_url, -available_markets, -explicit, -disc_number, -track_href, -is_local, -track_preview_url, -type, -track_uri, -external_urls.spotify, -external_urls.spotify)
# Group by the id column and sort each group by the popularity column in descending order
LP_grouped_df <- LP_df %>%
group_by(id) %>%
arrange(desc(popularity))
lp_alb <- head(unique(LP_grouped_df$album_name),1)
lp_alb
## [1] "Meteora 20th Anniversary Edition"
eminem <- get_artist_audio_features(
artist = "Eminem",
include_groups = "album",
return_closest_artist = TRUE,
dedupe_albums = TRUE,
market = NULL,
authorization = access_token
)
em_id <- "7dGJo4pcD2V6oG8kP0tJRR"
em_albums <- get_artist_albums(
id = em_id,
include_groups = c("album", "single", "appears_on", "compilation"),
market = NULL,
limit = 20,
offset = 0,
authorization = access_token,
include_meta_info = FALSE
)
# Create an empty list to store the data frames
album_list4 <- list()
# Iterate over the id values in the artist_albums data frame
for (id in em_albums$id) {
album_data <- get_albums(id, authorization = access_token)
# Add the resulting data frame to the list
album_list4[[id]] <- album_data
}
# Combine all data frames into a single data frame using the bind_rows() function
em_albums_df <- bind_rows(album_list4)
eminem <- eminem %>% rename("id" = "album_id")
# Merging all dataframes
em_merged_df <- full_join(em_albums_df, em_albums, by = "id")
em_merged_df <- full_join(em_merged_df, eminem, by = "id")
em_df <- select(em_merged_df, -name.x, -release_date.x, -total_tracks.x, -uri.x, -type.x, -artists.y, -album_group.x, -album_type.x, -artists.x, -images.x, -external_ids.upc, -available_markets.x, -href.x, -external_urls.spotify.x, -release_date_precision.x, -album_group.y, -album_type.y, -available_markets.y, -copyrights, -href.y, -images.y, -name.y, -release_date.y, -release_date_precision.y, -total_tracks.y, -type.y, -uri.y, -external_urls.spotify.y, -tracks.href,-tracks.limit, -tracks.next, -tracks.offset, -tracks.previous, -analysis_url, -available_markets, -explicit, -disc_number, -track_href, -is_local, -track_preview_url, -type, -track_uri, -external_urls.spotify, -external_urls.spotify)
# Group by the id column and sort each group by the popularity column in descending order
em_grouped_df <- em_df %>%
group_by(id) %>%
arrange(desc(popularity))
em_alb <- head(unique(em_grouped_df$album_name),1)
em_alb
## [1] "Music To Be Murdered By"
# Filter the data frames for the most popular album for each artist
ts_filtered <- ts_ged_df %>% dplyr::filter(album_name == "Midnights")
em_filtered <- em_grouped_df %>% dplyr::filter(album_name == "Music To Be Murdered By")
lp_filtered <- LP_grouped_df %>% dplyr::filter(album_name == "Meteora 20th Anniversary Edition")
be_filtered <- bi_ged_df %>% dplyr::filter(album_name == "WHEN WE ALL FALL ASLEEP, WHERE DO WE GO?")
# Combine the filtered data frames into one combined data frame
combined_df <- bind_rows(ts_filtered, em_filtered, lp_filtered, be_filtered)
# Keep only the relevant columns
combined_df <- combined_df %>% dplyr::select(artist_name, album_name, id,track_name, danceability,valence, speechiness,tempo, key_mode)
combined_df
# Plot a box plot for valence
boxplot_valence <-ggplot(combined_df, aes(x = artist_name, y = valence, fill = artist_name)) +
geom_boxplot(alpha=0.6) +
labs(title = "Valence Distribution by Artist",
x = "Artist Name", y = "Valence Score") +
theme_minimal() +
theme(plot.title = element_text(size = 18, face = "bold"),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10)) +
guides(fill = FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
boxplot_valence
library(tidyverse)
stats <- combined_df %>%
group_by(artist_name) %>%
summarise(
min_tempo = min(tempo),
max_tempo = max(tempo),
q1_tempo = quantile(tempo, 0.25),
median_tempo = median(tempo),
q3_tempo = quantile(tempo, 0.75)
)
stats
# Plot a box plot for danceability
boxplot_dance <-ggplot(combined_df, aes(x = artist_name, y = danceability, fill = artist_name)) +
geom_boxplot(alpha=0.6) +
labs(title = "Danceability Distribution by Artist",
x = "Artist Name", y = "Danceability Score") +
theme_minimal() +
theme(plot.title = element_text(size = 18, face = "bold"),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10)) +
guides(fill = FALSE)
boxplot_dance
# Plot a box plot for speechiness
boxplot_speech <- ggplot(combined_df, aes(x = artist_name, y = speechiness, fill = artist_name)) +
geom_boxplot(alpha=0.6) +
labs(title = "Speechiness Distribution by Artist",
x = "Artist Name", y = "Speechiness Score") +
theme_minimal() +
theme(plot.title = element_text(size = 18, face = "bold"),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10)) +
guides(fill = FALSE)
boxplot_speech
# Plot a box plot for tempo
boxplot_tempo <- ggplot(combined_df, aes(x = artist_name, y = tempo, fill = artist_name)) +
geom_boxplot(alpha=0.6) +
labs(title = "Tempo Distribution by Artist",
x = "Artist Name", y = "Tempo Score") +
theme_minimal() +
theme(plot.title = element_text(size = 18, face = "bold"),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10)) +
guides(fill = FALSE)
boxplot_tempo
custom_colors1 <- c("#FF0000", "#FF7F00", "#FFFF00", "#7FFF00", "#00FF00", "#00FF7F", "#00FFFF", "#007FFF", "#0000FF", "#7F00FF", "#FF00FF", "#FF007F", "#8B0000", "#FF4500", "#FFD700", "#ADFF2F", "#32CD32", "#66CDAA", "#00CED1", "#4682B4", "#483D8B", "#9400D3", "#FF1493")
# Create a stacked bar plot
key_plot <-ggplot(combined_df, aes(x = artist_name, fill = key_mode)) +
geom_bar(position = "stack", alpha = 0.8) +
labs(title = "Key Distribution by Artist", x = "Artist", y = "Count", fill = "Key Mode") +
scale_fill_manual(name = "Key Mode", values = c(custom_colors1, "grey")) +
theme_minimal() +
theme(plot.title = element_text(size = 18, face = "bold"),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10))
key_plot
library(dplyr)
# Create the data frame from the given data
key_data <- data.frame(
artist_name = c(rep("Billie Eilish", 10), rep("Eminem", 12), rep("Linkin Park", 21), rep("Taylor Swift", 9)),
key_mode = c("A major", "C major", "C# major", "D# major", "E major", "F minor", "F# minor", "G major", "G# major", "G# minor",
"A major", "A# minor", "C# major", "D major", "D minor", "D# major", "D# minor", "E minor", "F major", "F minor", "F# minor", "G# major",
"A major", "A minor", "A# major", "A# minor", "B major", "B minor", "C major", "C minor", "C# major", "C# minor", "D major", "D# major", "D# minor", "E major", "E minor", "F# major", "F# minor", "G major", "G minor", "G# major", "G# minor",
"A major", "A# major", "C major", "D major", "E major", "E minor", "G major", "G minor", "G# major"),
count = c(1, 2, 1, 1, 2, 1, 1, 2, 1, 2,
2, 6, 11, 3, 2, 1, 1, 5, 3, 2, 3, 1,
8, 1, 1, 9, 4, 1, 1, 1, 16, 8, 4, 1, 6, 4, 5, 5, 4, 2, 1, 3, 3,
2, 1, 6, 2, 4, 2, 6, 1, 2))
# Group by artist_name and arrange in descending order of count
key_data_grouped <- key_data %>%
group_by(artist_name) %>%
arrange(desc(count))
# Get the top 3 most used keys for each artist
top_keys <- key_data_grouped %>%
group_by(artist_name) %>%
slice_max(count, n = 3)
# Display the top 3 keys for each artist
print(top_keys)
## # A tibble: 14 × 3
## # Groups: artist_name [4]
## artist_name key_mode count
## <chr> <chr> <dbl>
## 1 Billie Eilish C major 2
## 2 Billie Eilish E major 2
## 3 Billie Eilish G major 2
## 4 Billie Eilish G# minor 2
## 5 Eminem C# major 11
## 6 Eminem A# minor 6
## 7 Eminem E minor 5
## 8 Linkin Park C# major 16
## 9 Linkin Park A# minor 9
## 10 Linkin Park A major 8
## 11 Linkin Park C# minor 8
## 12 Taylor Swift C major 6
## 13 Taylor Swift G major 6
## 14 Taylor Swift E major 4